// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Mehdi Goli    Codeplay Software Ltd.
// Ralph Potter  Codeplay Software Ltd.
// Luke Iwanski  Codeplay Software Ltd.
// Cummins Chris PhD student at The University of Edinburgh.
// Contact: <eigen3@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

/*****************************************************************
 * TensorSyclRun.h
 *
 * \brief:
 * Schedule_kernel invoke an specialised version of kernel struct. The
 * specialisation is based on the data dimension in sycl buffer
 *
 *****************************************************************/

#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP

namespace Eigen
{
namespace TensorSycl
{
    /// The run function in tensor sycl convert the expression tree to a buffer
    /// based expression tree;
    /// creates the expression tree for the device with accessor to buffers;
    /// construct the kernel and submit it to the sycl queue.
    template<typename Expr, typename Dev>
    void run(Expr& expr, Dev& dev)
    {
        Eigen::TensorEvaluator<Expr, Dev> evaluator(expr, dev);
        const bool                        needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
        if ( needs_assign ) {
            typedef typename internal::createPlaceHolderExpression<Expr>::Type PlaceHolderExpr;
            auto                                                               functors = internal::extractFunctors(evaluator);

            size_t tileSize = dev.m_queue.get_device().template get_info<cl::sycl::info::device::max_work_group_size>() / 2;
            dev.m_queue.submit([&](cl::sycl::handler& cgh) {
                // create a tuple of accessors from Evaluator
                auto       tuple_of_accessors = internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator);
                const auto range              = utility::tuple::get<0>(tuple_of_accessors).get_range()[0];
                size_t     GRange             = range;
                if ( tileSize > GRange )
                    tileSize = GRange;
                else if ( GRange > tileSize ) {
                    size_t xMode = GRange % tileSize;
                    if ( xMode != 0 ) GRange += (tileSize - xMode);
                }
                // run the kernel
                cgh.parallel_for<PlaceHolderExpr>(cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) {
                    typedef typename internal::ConvertToDeviceExpression<Expr>::Type DevExpr;
                    auto                                                             device_expr      = internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
                    auto                                                             device_evaluator = Eigen::TensorEvaluator<decltype(device_expr.expr), Eigen::DefaultDevice>(device_expr.expr, Eigen::DefaultDevice());
                    if ( itemID.get_global_linear_id() < range ) {
                        device_evaluator.evalScalar(static_cast<int>(itemID.get_global_linear_id()));
                    }
                });
            });
            dev.m_queue.throw_asynchronous();
        }

        evaluator.cleanup();
    }
}   // namespace TensorSycl
}   // namespace Eigen

#endif   // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
